// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2003 Somik Raha
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v $
// $Author: derrickoswald $
// $Date: 2005/04/10 23:20:44 $
// $Revision: 1.90 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.scanners;

import org.htmlparser.Attribute;
import org.htmlparser.Node;
import org.htmlparser.Tag;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

import java.util.Vector;

/**
 * The main scanning logic for nested tags. When asked to scan, this class
 * gathers nodes into a heirarchy of tags.
 */
public class CompositeTagScanner extends TagScanner {
    /**
     * Determine whether to use JVM or NodeList stack. This can be set to true
     * to get the original behaviour of recursion into composite tags on the JVM
     * stack. This may lead to StackOverFlowException problems in some cases
     * i.e. Windows.
     */
    private static final boolean mUseJVMStack = false;

    /**
     * Determine whether unexpected end tags should cause stack roll-up. This
     * can be set to true to get the original behaviour of gathering end tags
     * into whatever tag is open. This can be expensive, but should only be
     * needed in the presence of bad HTML.
     */
    private static final boolean mLeaveEnds = false;

    /**
     * Create a composite tag scanner.
     */
    public CompositeTagScanner() {
    }

    /**
     * Collect the children.
     * <p/>
     * An initial test is performed for an empty XML tag, in which case the
     * start tag and end tag of the returned tag are the same and it has no
     * children.
     * <p/>
     * If it's not an empty XML tag, the lexer is repeatedly asked for
     * subsequent nodes until an end tag is found or a node is encountered that
     * matches the tag ender set or end tag ender set. In the latter case, a
     * virtual end tag is created. Each node found that is not the end tag is
     * added to the list of children. The end tag is special and not a child.
     * <p/>
     * Nodes that also have a CompositeTagScanner as their scanner are recursed
     * into, which provides the nested structure of an HTML page. This method
     * operates in two possible modes, depending on a private boolean. It can
     * recurse on the JVM stack, which has caused some overflow problems in the
     * past, or it can use the supplied stack argument to nest scanning of child
     * tags within itself. The former is left as an option in the code, mostly
     * to help subsequent modifiers visualize what the internal nesting is
     * doing.
     *
     * @param tag   The tag this scanner is responsible for.
     * @param lexer The source of subsequent nodes.
     * @param stack The parse stack. May contain pending tags that enclose this
     *              tag.
     * @return The resultant tag (may be unchanged).
     */
    public Tag scan(Tag tag, Lexer lexer, NodeList stack) throws ParserException {
        Node node;
        Tag next;
        String name;
        Scanner scanner;
        Tag ret;

        ret = tag;

        if (ret.isEmptyXmlTag())
            ret.setEndTag(ret);
        else
            do {
                node = lexer.nextNode(false);
                if (null != node) {
                    if (node instanceof Tag) {
                        next = (Tag) node;
                        name = next.getTagName();
                        // check for normal end tag
                        if (next.isEndTag() && name.equals(ret.getTagName())) {
                            ret.setEndTag(next);
                            node = null;
                        } else if (isTagToBeEndedFor(ret, next)) // check DTD
                        {
                            // backup one node. insert a virtual end tag later
                            lexer.setPosition(next.getStartPosition());
                            node = null;
                        } else if (!next.isEndTag()) {
                            // now recurse if there is a scanner for this type
                            // of tag
                            scanner = next.getThisScanner();
                            if (null != scanner) {
                                if (mUseJVMStack) { // JVM stack recursion
                                    node = scanner.scan(next, lexer, stack);
                                    addChild(ret, node);
                                } else {
                                    // fake recursion:
                                    if (scanner == this) {
                                        if (next.isEmptyXmlTag()) {
                                            next.setEndTag(next);
                                            finishTag(next, lexer);
                                            addChild(ret, next);
                                        } else {
                                            stack.add(ret);
                                            ret = next;
                                        }
                                    } else { // normal recursion if switching scanners
                                        node = scanner.scan(next, lexer, stack);
                                        addChild(ret, node);
                                    }
                                }
                            } else
                                addChild(ret, next);
                        } else {
                            if (!mUseJVMStack && !mLeaveEnds) {
                                // Since all non-end tags are consumed by the
                                // previous clause, we're here because we have
                                // an
                                // end tag with no opening tag... this could be
                                // bad.
                                // There are two cases...
                                // 1) The tag hasn't been registered, in which
                                // case
                                // we just add it as a simple child, like it's
                                // opening tag
                                // 2) There may be an opening tag further up the
                                // parse stack that needs closing.
                                // So, we ask the factory for a node like this
                                // one
                                // (since end tags never have scanners) and see
                                // if it's scanner is a composite tag scanner.
                                // If it is we walk up the parse stack looking
                                // for
                                // something that needs this end tag to finish
                                // it.
                                // If there is something, we close off all the
                                // tags
                                // walked over and continue on as if nothing
                                // happened.
                                Vector attributes = new Vector();
                                attributes.addElement(new Attribute(name, null));
                                Tag opener = lexer.getNodeFactory().createTagNode(lexer.getPage(),
                                        next.getStartPosition(), next.getEndPosition(), attributes);

                                scanner = opener.getThisScanner();
                                if ((null != scanner) && (scanner == this)) {
                                    // uh-oh
                                    int index = -1;
                                    for (int i = stack.size() - 1; (-1 == index) && (i >= 0); i--) {
                                        // short circuit here... assume
                                        // everything on the stack has this as
                                        // it's scanner
                                        // we'll need to stop if either of those
                                        // conditions isn't met
                                        Tag boffo = (Tag) stack.elementAt(i);
                                        if (name.equals(boffo.getTagName()))
                                            index = i;
                                        else if (isTagToBeEndedFor(boffo, next)) // check
                                            // DTD
                                            index = i;
                                    }
                                    if (-1 != index) {
                                        // finish off the current one first
                                        finishTag(ret, lexer);
                                        addChild((Tag) stack.elementAt(stack.size() - 1), ret);
                                        for (int i = stack.size() - 1; i > index; i--) {
                                            Tag fred = (Tag) stack.remove(i);
                                            finishTag(fred, lexer);
                                            addChild((Tag) stack.elementAt(i - 1), fred);
                                        }
                                        ret = (Tag) stack.remove(index);
                                        node = null;
                                    } else
                                        addChild(ret, next); // default
                                    // behaviour
                                } else
                                    addChild(ret, next); // default behaviour
                            } else
                                addChild(ret, next);
                        }
                    } else {
                        addChild(ret, node);
                        node.doSemanticAction();
                    }
                }

                if (!mUseJVMStack) {
                    // handle coming out of fake recursion
                    if (null == node) {
                        int depth = stack.size();
                        if (0 != depth) {
                            node = stack.elementAt(depth - 1);
                            if (node instanceof Tag) {
                                Tag precursor = (Tag) node;
                                scanner = precursor.getThisScanner();
                                if (scanner == this) {
                                    stack.remove(depth - 1);
                                    finishTag(ret, lexer);
                                    addChild(precursor, ret);
                                    ret = precursor;
                                } else
                                    node = null; // normal recursion
                            } else
                                node = null; // normal recursion
                        }
                    }
                }
            } while (null != node);

        finishTag(ret, lexer);

        return (ret);
    }

    /**
     * Add a child to the given tag.
     *
     * @param parent The parent tag.
     * @param child  The child node.
     */
    protected void addChild(Tag parent, Node child) {
        if (null == parent.getChildren()) parent.setChildren(new NodeList());
        child.setParent(parent);
        parent.getChildren().add(child);
    }

    /**
     * Finish off a tag. Perhap add a virtual end tag. Set the end tag parent as
     * this tag. Perform the semantic acton.
     *
     * @param tag   The tag to finish off.
     * @param lexer A lexer positioned at the end of the tag.
     */
    protected void finishTag(Tag tag, Lexer lexer) throws ParserException {
        if (null == tag.getEndTag())
            tag.setEndTag(createVirtualEndTag(tag, lexer, lexer.getPage(), lexer.getCursor()
                    .getPosition()));
        tag.getEndTag().setParent(tag);
        tag.doSemanticAction();
    }

    /**
     * Creates an end tag with the same name as the given tag.
     *
     * @param tag      The tag to end.
     * @param lexer    The object containg the node factory.
     * @param page     The page the tag is on (virtually).
     * @param position The offset into the page at which the tag is to be anchored.
     * @return An end tag with the name '"/" + tag.getTagName()' and a start and
     *         end position at the given position. The fact these positions are
     *         equal may be used to distinguish it as a virtual tag later on.
     */
    protected Tag createVirtualEndTag(Tag tag, Lexer lexer, Page page, int position)
            throws ParserException {
        Tag ret;
        String name;
        Vector attributes;

        name = "/" + tag.getRawTagName();
        attributes = new Vector();
        attributes.addElement(new Attribute(name, (String) null));
        ret = lexer.getNodeFactory().createTagNode(page, position, position, attributes);

        return (ret);
    }

    /**
     * Determine if the current tag should be terminated by the given tag.
     * Examines the 'enders' or 'end tag enders' lists of the current tag for a
     * match with the given tag. Which list is chosen depends on whether tag is
     * an end tag ('end tag enders') or not ('enders').
     *
     * @param current The tag that might need to be ended.
     * @param tag     The candidate tag that might end the current one.
     * @return <code>true</code> if the name of the given tag is a member of the
     *         appropriate list.
     */
    public final boolean isTagToBeEndedFor(Tag current, Tag tag) {
        String name;
        String[] ends;
        boolean ret;

        ret = false;

        name = tag.getTagName();
        if (tag.isEndTag())
            ends = current.getEndTagEnders();
        else
            ends = current.getEnders();
        for (int i = 0; i < ends.length; i++)
            if (name.equalsIgnoreCase(ends[i])) {
                ret = true;
                break;
            }

        return (ret);
    }
}
